{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 209,
   "id": "f4f02e5c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from lxml import etree\n",
    "import selenium\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.webdriver.common.action_chains import ActionChains\n",
    "import os\n",
    "import time\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "df732e4f",
   "metadata": {},
   "outputs": [],
   "source": [
    "driver = webdriver.Edge()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "7984bc06",
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_url =  'https://tieba.baidu.com/f?kw=%E5%AD%99%E7%AC%91%E5%B7%9D&fr=index'\n",
    "base_url = 'https://tieba.baidu.com'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "id": "58805387",
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.get(raw_url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "id": "8de2e58b",
   "metadata": {},
   "outputs": [],
   "source": [
    "hrefs = driver.find_elements(by=By.XPATH, value=\"//*[@id='thread_list']/li[position()>1]/div/div[2]/div[1]/div[1]/a\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "id": "9f0c4e3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "texts = [i.text for i in hrefs]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "id": "06217195",
   "metadata": {},
   "outputs": [],
   "source": [
    "hrefs = [ i.get_attribute('href') for i in hrefs]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "8dbc0426",
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.get(hrefs[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "id": "14a015c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(50):\n",
    "    driver.execute_script('window.scrollBy(0,500)')\n",
    "    time.sleep(0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "0858cde5",
   "metadata": {},
   "outputs": [],
   "source": [
    "p_content = driver.find_elements(By.XPATH, \"//cc/div[2]\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "14d12092",
   "metadata": {},
   "outputs": [],
   "source": [
    "p_texts = [i.text for i in p_content]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "58bb8dd9",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['鼠鼠帮朋友代发，黄头是极品饥渴阿姨，以为是男大打算拿下结果气急败坏要开车到学校门口，最后有反转🥰🥰🥰\\n点击展开，查看完整图片',\n",
       " '继续\\n点击展开，查看完整图片\\n\\n点击展开，查看完整图片\\n\\n点击展开，查看完整图片',\n",
       " '给大伙都逗笑了\\n点击展开，查看完整图片\\n\\n点击展开，查看完整图片\\n\\n点击展开，查看完整图片\\n\\n点击展开，查看完整图片',\n",
       " 'gkd',\n",
       " '阿姨那颗寂寞的心给你勾起来了',\n",
       " '后续又来咯\\n点击展开，查看完整图片',\n",
       " '暂时完结了',\n",
       " '后续呢',\n",
       " '跳蛋入脑',\n",
       " '建议直接冲上车一拳撂倒',\n",
       " '快马加编 后续后续',\n",
       " '精彩',\n",
       " '沪爷的攻击性太中了',\n",
       " '',\n",
       " '后边呢',\n",
       " '磨磨唧唧干嘛呢，5点多了xdm还等着呢',\n",
       " '3',\n",
       " '快更',\n",
       " '俩间厕所请问是多少w',\n",
       " '笑死我了',\n",
       " '不会反转床上打架去了吧',\n",
       " '所以，南大什么时候在上海建了个校区？',\n",
       " '佳姗',\n",
       " '3',\n",
       " '不会是这种路虎吧',\n",
       " '',\n",
       " '？',\n",
       " '后续呢？兄弟',\n",
       " 'cy',\n",
       " '1']"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p_texts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "id": "57cce2eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "clicker = driver.find_elements(By.CSS_SELECTOR, \"a.j_lzl_m\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 222,
   "id": "1ffcd8b5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33348\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33349\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33350\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33351\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33352\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33353\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33354\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33355\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33356\")>,\n",
       " <selenium.webdriver.remote.webelement.WebElement (session=\"4ebefd327d88b06ad93b049b90cb6d27\", element=\"f.401F5941081A419D345EF01352EDE8AB.d.BAF52E5528A67A9DFDC74AB75AA9F658.e.33357\")>]"
      ]
     },
     "execution_count": 222,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clicker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "id": "6ef2b1ac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'x': 306, 'y': 3029}\n",
      "{'x': 306, 'y': 5828}\n",
      "{'x': 306, 'y': 6692}\n",
      "{'x': 313, 'y': 7524}\n",
      "{'x': 306, 'y': 8816}\n",
      "{'x': 306, 'y': 9644}\n",
      "{'x': 299, 'y': 10979}\n",
      "{'x': 312, 'y': 13680}\n",
      "{'x': 299, 'y': 15864}\n",
      "{'x': 299, 'y': 18621}\n"
     ]
    }
   ],
   "source": [
    "for i in [i for i in clicker if i.text !='']:\n",
    "    driver.execute_script(\"arguments[0].scrollIntoView();\", i)\n",
    "    print(i.location)\n",
    "    time.sleep(3)\n",
    "    driver.execute_script(\"arguments[0].click();\", i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "id": "5721f256",
   "metadata": {},
   "outputs": [],
   "source": [
    "p_comment = driver.find_elements(By.XPATH, \"//ul[@class='j_lzl_m_w']//li//span[@class='lzl_content_main']\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "id": "87cacdd8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['回复 比亚迪114514 :上桌吃饭导致的',\n",
       " '我在这澄清一下，我们烤批攻击力就是这么强',\n",
       " '老p找新棍',\n",
       " '路虎有盛世这个型号吗',\n",
       " '揽胜吧',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " 'gkd，等后续',\n",
       " '看到“真192吗”没绷住',\n",
       " '@春山折 哈哈哈哈',\n",
       " '回复 小糸侑☁️ :《上车》',\n",
       " '她以为楼主的雕有192！',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '回复 苏廷牧 :刚才已经被阿姨发现是女生了😋😋😋气急败坏',\n",
       " '回复 比亚迪114514 :给阿姨来一点儿青春的味道属于是',\n",
       " '回复 比亚迪114514 :我喜欢你',\n",
       " '回复 逆旅居士✔️ :你也开着路虎来接我么',\n",
       " '回复 比亚迪114514 :只要老婆喜欢，我可以买',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '3',\n",
       " '3',\n",
       " '3',\n",
       " '3',\n",
       " '3',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '好杀',\n",
       " '',\n",
       " '以前我看吧友说“你直接上去扣，她受不了的”，我是不信的，现在嘛...',\n",
       " '能不能把阿姨推给我心疼阿姨',\n",
       " '3',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " 'gkd',\n",
       " '快更新上课等着看呢。',\n",
       " '让我来',\n",
       " '笑死我了',\n",
       " '攻击力我是认可的',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '小蒂代替思考',\n",
       " '3',\n",
       " '3',\n",
       " '3',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '3',\n",
       " '3',\n",
       " '3',\n",
       " '3',\n",
       " '3',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '更完了兄弟们',\n",
       " '3',\n",
       " '3',\n",
       " '40捏，在上海那个地界儿还是保守了',\n",
       " '回复 长寿秘诀是谎报年龄 :朋友市中心的，确实有实力😭😭😭羡慕',\n",
       " '1平10w起步，自己算吧',\n",
       " '回复 比亚迪114514 :所以你那个朋友也是个女的？',\n",
       " '回复 智慧魔星X :是的',\n",
       " '楼主朋友明显女的啊',\n",
       " '老头乐进化太快了，以后都可以出半挂老头乐了',\n",
       " '也不知道这byd到底是不是真开车到学校门口了',\n",
       " '回复 SVD :至少现在已经出到皮卡老头乐了，前几天去山东时候见过',\n",
       " '老头乐房车都出了',\n",
       " '回复 吃一个可爱多 :我昨天在我们这见着了，我还楞半天',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '',\n",
       " '']"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[i.text for i in p_comment]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "684dda61",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'p_props_tail props_appraise_wrap'"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p_comment[5].attrib['class']"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
